#loading the libraries.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(readxl)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(treemapify)
library(dplyr)
#loading electronics data set
electronics <- read_xlsx("C:\\Users\\User\\Downloads\\electronics.xlsx")
colnames(electronics)
## [1] "Week" "No. of Commercials" "Sales Volume"
View(electronics)
electronics <- clean_names(electronics, "lower_camel")
#scatterPlot with trend line
scatter <- ggplot(data = electronics, mapping = aes(x = noOfCommercials, y = salesVolume))+
geom_point()+
geom_smooth(se = F, method = lm )+
labs(x = "Number of Commercial", y = "Sales Volume")
ggplotly(scatter)
## `geom_smooth()` using formula = 'y ~ x'
#loading kirklandregional data set
kirklandregional <- read_xlsx("C:\\Users\\User\\Downloads\\kirklandregional.xlsx")
#Data cleaning process
kirklandregional <- clean_names(kirklandregional, "lower_camel")
colnames(kirklandregional)
## [1] "month" "north" "south"
unique(kirklandregional$month)
## [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
kirklandregional$month <- ordered(kirklandregional$month,
level = c("Jan", "Feb", "Mar", "Apr", "May",
"Jun", "Jul", "Aug", "Sep", "Oct",
"Nov", "Dec"))
#ploating line graphs
line1 <- ggplot(data = kirklandregional, mapping = aes(x = month, y = north, group = 1))+
geom_line(color = "blue")+
labs(x = "Month", y = "North")
ggplotly(line1)
line2 <- ggplot(data = kirklandregional, mapping = aes(x = month, y = south, group = 1))+
geom_line(color = "orange")+
labs(x = "Month", y = "South")
ggplotly(line2)
line3 <- ggplot(data = kirklandregional, mapping = aes(x = month, group = 1))+
geom_line(aes(y = north) , color = "blue")+
geom_line(aes(y = south) , color = "orange")+
labs(x = "Month", y = "Salels ($10000)")
ggplotly(line3)
#loading accountsmanaged data set
accounts_managed <- read_xlsx("C:\\Users\\User\\Downloads\\accounts_managed.xlsx")
View(accounts_managed)
accounts_managed <- clean_names(accounts_managed, "lower_camel")
colnames(accounts_managed)
## [1] "manager" "accountsManaged"
#bar chart
bar <- ggplot(data = accounts_managed, mapping =
aes(x = accountsManaged,
y = reorder(manager, accountsManaged )))+
geom_bar(fill = "skyblue", stat = "identity")+
labs(x = "Accounts Managed", y = "Managers")
ggplotly(bar)
#loading billionaires data set
billionaires <- read_xlsx("C:\\Users\\User\\Downloads\\billionaires.xlsx")
billionaires <- clean_names(billionaires, "lower_camel")
#bubble chart diagram
Buuble_chart <- ggplot(billionaires, aes(x = billionairesPer10MResidents, y = perCapitaIncome,
size = numberOfBillionaires, color = country))+
geom_point(alpha = 0.7)+
scale_size(name = "Number of Billionaires")+
theme(legend.position = "none")+
xlab("Billionaires Per 10M Residents")+
ylab("Per Capita Income")
ggplotly(Buuble_chart)
#cluster column chart using kirklandregional data set
#First unpivot the table
Newkirklandregional <- kirklandregional %>%
pivot_longer(cols = c ("north" : "south"),
values_to = "values", names_to = "direction")
View(Newkirklandregional)
Newkirklandregional$direction <- if_else(Newkirklandregional$direction
== "north", "North", "South")
#stacked column diagram
stacked <- ggplot(Newkirklandregional, aes(x = month, y = values, fill = direction))+
geom_bar(stat = "identity")+
labs(title = "Stacked Column Diagram", x = "Month", y = "Values", fill = "Direction")
ggplotly(stacked)
#clustered column diagram
clustered <- ggplot(Newkirklandregional, aes(x = month, y = values, fill = direction))+
geom_bar(stat = "identity", position = "dodge")+
labs(title = "Clustered Column Diagram", x = "Month", y = "Values", fill = "Direction")
ggplotly(clustered)
#loading global100 data set
global100 <- read_xlsx("C:\\Users\\User\\Downloads\\global100.xlsx")
str(global100)
## tibble [100 × 4] (S3: tbl_df/tbl/data.frame)
## $ Continent : chr [1:100] "Asia" "Asia" "Asia" "Asia" ...
## $ Country : chr [1:100] "China" "China" "China" "China" ...
## $ Company : chr [1:100] "Agricultural Bank of China" "Bank of China" "China Construction Bank" "ICBC" ...
## $ Market Value (Billions US $): num [1:100] 141 124 174 216 202 ...
global100 <- clean_names(global100, "snake")
#Tree map
ggplot(global100, aes(fill = continent,
area = market_value_billions_us, label = company))+
geom_treemap( color = "grey")+
geom_treemap_text(size = 8)+
labs(fill = "Continent")

#loading worldgdp2014 data set
worldgdp2014 <- read_xlsx("C:\\Users\\User\\Downloads\\worldgdp2014.xlsx")
str(worldgdp2014)
## tibble [190 × 3] (S3: tbl_df/tbl/data.frame)
## $ Country Name : chr [1:190] "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ GDP 2014 (Billions US $): num [1:190] 20.05 13.22 213.52 126.78 1.22 ...
## $ GDP Growth 2014 (%) : num [1:190] -1.71 1.91 1.81 1.43 3.55 ...
worldgdp2014 <- clean_names(worldgdp2014, "snake")
n_distinct(worldgdp2014$country_name)
## [1] 190
mapdata <- map_data("world")
mapdata <- dplyr::rename(mapdata, country_name = region)
mapdata1<- left_join(mapdata, worldgdp2014, by = "country_name")
mapdata1 <- mapdata1 %>%
filter(!is.na(mapdata1$gdp_2014_billions_us &
mapdata1$gdp_growth_2014_percent))
map <- ggplot(mapdata1, aes(x = long, y = lat, group = group, label = country_name))+
geom_polygon(aes(fill = gdp_2014_billions_us), color = "black")+
scale_fill_gradient(name = "GDP 2014 Billions ($)", low = "yellow", high = "red")+
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
rect = element_blank())
ggplotly(map)
#loading homesalesstacked data set
homesalesstacked <- read_xlsx("C:\\Users\\User\\Downloads\\homesalesstacked.xlsx")
homesalesstacked <- clean_names(homesalesstacked, "snake")
box <- ggplot(homesalesstacked, aes(x = location, y = selling_price, fill = location))+
geom_boxplot()+
scale_y_continuous(labels = scales::comma)+
theme(legend.position = "none")+
xlab("Location")+
ylab("Selling Price")
ggplotly(box)
R Core Team (2023). R: A Language and Environment for
Statistical Computing. R Foundation for Statistical Computing,
Vienna, Austria. https://www.R-project.org/.